ohi logo
OHI Science | Citation policy

Overview

This layer is derived from EPA Beach Closure data. We use the number of days a beach is closed per year due to pathogens in the water as a proxy for the impact of pathogens in coastal waters. Data is provided at the beach level, aggregated to county and then again aggregated to the region level.

The highest observed pressure score was 9.5% in Rhode Island. Indicating, on average a given beach would be closed 9.5% of the year. This high result was largely driven to the Atlantic Beach Club beach being closed more than 80 days in 2006.

knitr::opts_chunk$set(fig.width = 7, fig.height = 7,message = FALSE, warning = FALSE)

source('~/github/ohi-northeast/src/R/common.R')

library(tidyverse)
library(plotly)

Data Wrangling

# data for all states exept New York
df <- read_csv('data/beach_actions_(advisories_and_closures).csv')

New York Beaches Data

I downloaded the New York beaches dataset on its own. We have to filter out beaches from the great lakes and finger lakes regions.

ny <- read_csv('data/beach_actions_(advisories_and_closures)_NY.csv')%>%
      filter(County %in% c('BRONX','QUEENS','KINGS','SUFFOLK','NASSAU','RICHMOND','WESTCHESTER')) #ocean counties

Split Massachusets between two regions

Since Massachussetts has state waters divided into two regions, we have to manually assign counties to the Gulf of Maine and Virginian regions

split <- df%>%
          filter(County %in% c('BARNSTABLE','PLYMOUTH'))%>%
          select(State,County,`Beach Name`)%>%
          unique()
nrow(split)
## [1] 197
write.csv(split,file = 'data/ma_beaches.csv')

There are 197 beaches in these two counties that require manual matching. Using the BEACON map viewer, I matched each beach in Plymouth and Barnstable Counties to either Region 4 or 5.

Now combine all beaches to rgn_ids

mass_bch <- read_csv('data/ma_beaches_rgn_id.csv')[,1:4]
mass_cnty <- read_csv('~/github/ohi-northeast/src/tables/MA_counties.csv')[,2:4]
mass_cnty$County = toupper(mass_cnty$County)

df_pb <- df%>%
         filter(State == 'MA' & County %in% c('BARNSTABLE','PLYMOUTH'))%>%
         left_join(mass_bch,by = c('State','County','Beach Name'))%>%
         select(State,County,Year,`Beach Name`,ActionStartDate,ActionEndDate,`ActionDuration Days`,rgn_id)

df_ma <- df%>%
        filter(State == 'MA'&
               !County %in% c('BARNSTABLE','PLYMOUTH'))%>%
        left_join(mass_cnty,by = 'County')%>%
         select(State,County,Year,`Beach Name`,ActionStartDate,ActionEndDate,`ActionDuration Days`,rgn_id)

#adding State column to rgn_data
rgn_data$State = as.character(rgn_data$rgn_abrev)

#matching rgn_id to non MASS data
df_rest <- df%>%
          rbind(ny)%>% #add in New York
          filter(State != 'MA')%>%
          left_join(rgn_data,by = 'State')%>%
          select(State,County,Year,`Beach Name`,ActionStartDate,ActionEndDate,`ActionDuration Days`,rgn_id)

df_all = rbind(df_pb,df_ma,df_rest)%>%
         left_join(rgn_data,by = 'rgn_id')%>%
         select(-State.y,-rgn_abrev)

Explore Data

library(trelliscopejs)

#percent of days beach is closed by county 
perc <- df_all%>%
        unique()%>% #some weird duplicates
        group_by(`Beach Name`, Year, County, rgn_id,rgn_name)%>%
        summarize(days_closed = sum(`ActionDuration Days`))%>%
        mutate(perc_closed = days_closed/365)%>%
        ungroup()%>%
        group_by(County, rgn_id, Year,rgn_name)%>%
        summarize(perc_closed = mean(perc_closed))

ggplot(perc,aes(x = Year,y = perc_closed,color = `County`))+
  geom_line()+
   trelliscopejs::facet_trelliscope(~rgn_name,self_contained = TRUE)

Aggregate to regions

The mean annual proportion a given beach is closed per year in each region is calculated and plotted below.

st <- perc%>%
      group_by(rgn_name,rgn_id,Year)%>%
      summarize(m = mean(perc_closed)*100)

g <- ggplot(st,aes(x = Year,y = m,color = rgn_name))+
  geom_line()+
  labs(title = "Beach Closures (mean annual proportion any given beach is closed)",
       y = "Proportion (%) of year")
ggplotly(g)

Rescale

Scores are rescaled using the maximum value across regions, across all time. The reference value is 9.62%.

scores <- st%>%
       mutate(score = m/max(.$m))

Results

Each region is scored based on the mean annual proportion any given beach within a county is closed - aggregated to regional level.

## scores through time

time_plot <- ggplot(scores,aes(x = Year,y = score, col = rgn_name))+
  geom_line() +
  labs(title = "Pressure Layer: Chemicals",
       x = "Year",
       y = "Pressure Score",
       colour = "Region")
ggplotly(time_plot)

Pressure Scores for 2016

map_scores(scores%>%filter(Year==2016),
           scale_label = 'Pressure Score',
           map_title = "2016",
           rev_col = T)

out <- scores%>%
        select(rgn_name,rgn_id,year=Year,score)

#write.csv(out,file = 'scores/pathogens.csv')

TODO

Gapfilling

We’ll need to gapfill over time